# Import libraries 
library(spotifyr)
library(tidyverse)
library(knitr)
library(ggbiplot)
library(ggplot2)
library(plotly)
library(grid)
library(gridExtra)
library(ggfortify)
library(factoextra)
# Set parameter to work with Spotify API
Sys.setenv(SPOTIFY_CLIENT_ID = 'f2827ef9fcf44a63bc5a17fdd48b3df0')
Sys.setenv(SPOTIFY_CLIENT_SECRET = 'f811d4aedeae4c2e815b9c7c2462c32a')
access_token <- get_spotify_access_token()
# Get my playlist tracks and audio features
myplaylists <- get_user_playlists('mandelbrotian')
playlist_tracks <- get_playlist_tracks(myplaylists)
## [1] 1
## [1] 1
## [1] 2
playlist_audio_features <- get_track_audio_features(playlist_tracks)
playlist_tracks_features <- cbind(playlist_tracks, playlist_audio_features)
# Remove Redundant Columns 
clean_playlist_tracks_features <- playlist_tracks_features[-c(1,2,7,8,20)]
head(clean_playlist_tracks_features)
# Use only 6 of the features
playlist_features_reduced = playlist_audio_features[, c(1:2, 6:9)]

# Find principal compenents of my Spotify Playlist
myplaylist.pca <- prcomp(playlist_features_reduced , center = TRUE, scale. = TRUE)
head(myplaylist.pca, n = 2)
## $sdev
## [1] 1.3623127 1.1491311 0.9687259 0.9097068 0.8503020 0.5784393
## 
## $rotation
##                          PC1         PC2         PC3        PC4
## danceability      0.40499324 -0.26417189  0.15710641  0.7506265
## energy           -0.64982129 -0.06084257  0.08202058  0.1615719
## speechiness       0.02639816 -0.64202610 -0.37403784  0.1806236
## acousticness      0.60152161 -0.01468836 -0.20137075 -0.4245506
## instrumentalness -0.07004461  0.43034274 -0.83994292  0.2921733
## liveness         -0.21513114 -0.57350088 -0.28745713 -0.3349940
##                          PC5         PC6
## danceability     -0.37429004 -0.19487953
## energy            0.07908399 -0.73139820
## speechiness       0.63656717  0.09674020
## acousticness      0.06011256 -0.64307781
## instrumentalness -0.13676758 -0.01800442
## liveness         -0.65277563  0.06202221
# Use first 2 principal components
my_pca12 = (myplaylist.pca$x[,c('PC1','PC2')])
# Convert to dataframe
data.frame(my_pca12)
# Find optimal number of clusters

# Within sum of squares method
fviz_nbclust(my_pca12, kmeans, method = "wss") +
  labs(subtitle = "Elbow method")

#Use the silhouette method to determine the optimal number of clusters
fviz_nbclust(my_pca12, kmeans, method = "silhouette")+
  labs(subtitle = "Silhouette method")

# Cluster data with 3 centers using kmeans
my_km3 = kmeans(my_pca12,3,nstart=25)
my_km3
## K-means clustering with 3 clusters of sizes 62, 9, 30
## 
## Cluster means:
##          PC1        PC2
## 1 -0.8474000  0.2108313
## 2  0.3474326 -2.6785032
## 3  1.6470636  0.3678330
## 
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 3 3 1 1 1 1 1 1 1 1 3 3 3 1 1 1 3 1 3 3 1 2 3 1 3 1
##  [36] 2 1 1 1 1 2 1 3 1 3 1 1 1 1 3 3 1 1 2 1 2 1 1 3 1 1 3 3 1 1 3 3 3 1 3
##  [71] 3 1 1 3 1 3 1 1 1 1 3 1 3 1 1 3 3 1 2 1 1 2 3 2 1 1 1 3 1 2 1
## 
## Within cluster sum of squares by cluster:
## [1] 61.14582 19.23767 38.87967
##  (between_SS / total_SS =  62.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"
df_out <- as.data.frame(myplaylist.pca$x)
df_out$group<-my_km3$cluster
head(df_out)
# Combine PCA df with song playlist df
my_spotify_full_df = cbind(clean_playlist_tracks_features, df_out)
head(my_spotify_full_df)
autoplot(data=playlist_features_reduced, kmeans(playlist_features_reduced,3
), frame = FALSE, loadings = TRUE, loadings.label = TRUE, frame.type = 'norm')+
  labs(title = "Bryn's Top Songs, All Features")

my_spotify_full_df$group = as.factor(my_spotify_full_df$group)
p<-ggplot(my_spotify_full_df,aes(x=PC1,y=PC2, color=group, text = paste("Track Name:", track_name, '\n', "Artist:", artist_name)))
p<-p+geom_point()
p<-p+ggtitle("Bryn's Top 100")
ggplotly(p)
# Load Spotify top 100 playlist
library(readr)
featuresdf <- read_csv("featuresdf.csv")
reduced_features_spotifydf <-featuresdf[,c(4,5,9:13)]
spotify_km1 = kmeans(reduced_features_spotifydf,centers=4, nstart=25)
spotify_playlist.pca <- prcomp(reduced_features_spotifydf, center = TRUE, scale. = TRUE)
spotify_df_out <- as.data.frame(spotify_playlist.pca$x)
spotify_df_out$group<-spotify_km1$cluster
head(df_out)
spotify_full_df = cbind(featuresdf, spotify_df_out)
head(spotify_full_df)
spotify_features_reduced <-spotify_full_df[,c(4,5,7,9:13)]

autoplot(data=spotify_features_reduced, kmeans(spotify_features_reduced,3
), frame = FALSE, loadings = TRUE, loadings.label = TRUE, frame.type = 'norm')+
  labs(title = "Spotify's Top Songs, All Features")

spotify_full_df$group = as.factor(spotify_full_df$group)
p<-ggplot(spotify_full_df,aes(x=PC1,y=PC2, color=group, text = paste("Track Name:", name, '\n', "Artist:", artists, '\n', "Danceability:", danceability)))
p<-p+geom_point() 
p<-p+ggtitle("Spotify's Top 100")
ggplotly(p)